clear all
set more off
cap log close

do "H:\Lavecchia_7086\to-transfer-jan-2022\RESTAT_Replication_Programs\0_Set_Directories.do"


****************************************************************************
* This do-file uses capital_income_8299 
*	- capital gains, income, dividends and rrsp related variables
* 
* Organizes the data by adding relevant variables and etc to study 1982-1995 period
* 
****************************************************************************

cap log using "$dir_log\2_Organize_data.log", replace



****************************************************************************
use "$dir_data\capital_income_8299.dta"
****************************************************************************


* Step 1: ensure the data is clean, contains only one observation per person
cap drop dup
quietly bysort lin__i year: gen dup=cond(_N==0,1,_n)
tab dup
keep if dup<=1
drop dup

* Step 2: merge with CPI
sort  year
merge n:1 year using "$dir_data\inflation.dta"
keep if _merge==1 | _merge==3
drop _merge
drop cpi_2002_100 cpi_2017_100

* merge with weights
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\weight.dta"
drop if _merge==2
drop _merge

* merge with death date
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\death_8299.dta"
drop if _merge==2
drop _merge


* Step 2b: drop extremes
drop if yod__i==year
replace clkgli=0 if clkgli<0
replace clkgxi=0 if clkgxi<0
drop if clkgxi*cpi_to2016> 500000*2.3388


* Step 3: create new variables

* a) calculate values of capital gains in 1982-84:

*gen temp1=clkgli if year>=1982 & year<=1984
gen temp2=clkgxi*cpi_to2016 if year>=1982 & year<=1984
*gen x=temp1/temp2
*sum x, d
* formula is correct for net and gross capital gains - 50%
*
bysort lin__i: egen max_capgains8284=max(temp2)
bysort lin__i: egen mean_capgains8284=mean(temp2)
drop temp2

/*
bysort lin__i: egen min_capgains8284=min(temp2)
replace temp2=temp2*cpi_to2016
bysort lin__i: egen min_capgains8284_ia=min(temp2)
bysort lin__i: egen max_capgains8284_ia=max(temp2)
bysort lin__i: egen mean_capgains8284_ia=mean(temp2)
*/

* b) calculate values of capital gains in 85-1992:

gen temp=clkgxi*cpi_to2016 if year>=1985 & year<=1991
bysort lin__i: egen total_8591=total(temp)
drop temp

gen temp=clkgxi*cpi_to2016 if year>=1985 & year<=1989
bysort lin__i: egen total_8589=total(temp)
drop temp


* c) income variables

gen taxinc_cg=txi__i+ggex_i+klpyc-clkgli 
replace taxinc_cg=txi__i+klpyc-clkgli  if ggex_i==. 
replace taxinc_cg=txi__i-clkgli  if ggex_i==.  & klpyc==.
*
gen agi_cg=tirc_i-clkgli-xdiv_i-invi_i


sort lin__i year
save "$dir_data\capital_income_final_8299.dta", replace


* Step 4 : save sub file for merge with 2000-2016 data

keep total_8591 total_8589 lin__i max_capgains8284 mean_capgains8284 

quietly bysort lin__i : gen dup=cond(_N==0,1,_n)
keep if dup<=1
drop dup

save "$dir_data\capital_income_formerge.dta", replace


****************************************************************************
clear
use "$dir_data\capital_income_0016.dta"
****************************************************************************


*sample 10 
*save "$dir_data\capital_income_sample_0016.dta", replace

* Step 1: ensure the data is clean, contains only one observation per person
cap drop dup
quietly bysort lin__i year: gen dup=cond(_N==0,1,_n)
tab dup
keep if dup<=1
drop dup

* Step 2: merge with CPI
sort  year
merge n:1 year using "$dir_data\inflation.dta"
keep if _merge==1 | _merge==3
drop _merge
drop cpi_2002_100 cpi_2017_100

* merge with weights
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\weight.dta"
drop if _merge==2
drop _merge

* merge with death date
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\death_0016.dta"
drop if _merge==2
drop _merge

* Step 2b: drop extremes
drop if yod__i==year
replace clkgli=0 if clkgli<0
replace clkgxi=0 if clkgxi<0
drop if clkgxi*cpi_to2016> 500000*2.3388


* Step 3: create new variables

* a) calculate values of capital gains in 1982-84 and total 1992:

sort lin__i 
merge n:1 lin__i using "$dir_data\capital_income_formerge.dta"
keep if _merge==1 | _merge==3
drop _merge


* c) income variables

gen taxinc_cg=txi__i+ggex_i+klpyc-clkgli 
replace taxinc_cg=txi__i+klpyc-clkgli  if ggex_i==. 
replace taxinc_cg=txi__i-clkgli  if ggex_i==.  & klpyc==.
*
gen agi_cg=tirc_i-clkgli-xdiv_i-invi_i





save "$dir_data\capital_income_final_0016.dta", replace
*/

* NOW REPEAT WITH SPOUSAL DATA


****************************************************************************
use "$dir_data\capital_income_spousal_8299.dta"
****************************************************************************


* Step 1: ensure the data is clean, contains only one observation per person
cap drop dup
quietly bysort lin__i year: gen dup=cond(_N==0,1,_n)
tab dup
keep if dup<=1
drop dup

* Step 2: merge with CPI
sort  year
merge n:1 year using "$dir_data\inflation.dta"
keep if _merge==1 | _merge==3
drop _merge
drop cpi_2002_100 cpi_2017_100

* merge with weights
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\weight.dta"
drop if _merge==2
drop _merge

* merge with death date
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\death_8299.dta"
drop if _merge==2
drop _merge


* Step 2b: drop extremes
drop if yod__i==year
replace clkgxp=0 if clkgxp<0
drop if clkgxp*cpi_to2016> 500000*2.3388


* Step 3: create new variables

* a) calculate values of capital gains in 1982-93:

*  calculate values of capital gains in 1982-93:

gen temp2=clkgxp if year>=1982 & year<=1993
replace temp2=. if ggex_/0.5>50000 & (year==1985 )
replace temp2=. if ggex_/0.5>100000 & (year>=1986 & year<=1987 )
replace temp2=. if  ggex_/0.667 >200000 & (year>=1988 & year<=1989)
replace temp2=. if  ggex_/0.75  >200000 & (year>=1990 & year<=1994)
*
replace temp2=temp2*cpi_to2016 if year>=1982 & year<=1993
*
bysort lin__i: egen mean_capgains8293=mean(temp2)
drop temp2
gen temp2=(clkgxp!=0) if year>=1982 & year<=1993
bysort lin__i: egen n_capgains8293=total(temp2)
drop temp2

* b) calculate values of capital gains in 85-1992:


gen temp=clkgxp*cpi_to2016 if year>=1985 & year<=1989
bysort lin__i: egen total_8589=total(temp)
drop temp

save "$dir_data\capital_income_spousal_final_8299.dta", replace

* Step 4 : save sub file for merge with 2000-2016 data

keep  total_8589 lin__i mean_capgains8293 n_capgains8293 

quietly bysort lin__i : gen dup=cond(_N==0,1,_n)
keep if dup<=1
drop dup

save "$dir_data\capital_income_spousal_formerge.dta", replace


****************************************************************************
clear
use "$dir_data\capital_income_spousal_0016.dta"
****************************************************************************


*sample 10 
*save "$dir_data\capital_income_sample_0016.dta", replace

* Step 1: ensure the data is clean, contains only one observation per person
cap drop dup
quietly bysort lin__i year: gen dup=cond(_N==0,1,_n)
tab dup
keep if dup<=1
drop dup

* Step 2: merge with CPI
sort  year
merge n:1 year using "$dir_data\inflation.dta"
keep if _merge==1 | _merge==3
drop _merge
drop cpi_2002_100 cpi_2017_100

* merge with weights
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\weight.dta"
drop if _merge==2
drop _merge

* merge with death date
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\death_0016.dta"
drop if _merge==2
drop _merge

* Step 2b: drop extremes
drop if yod__i==year
replace clkgxp=0 if clkgxp<0
drop if clkgxp*cpi_to2016> 500000*2.3388


* Step 3: create new variables

* a) calculate values of capital gains in 1982-84 and total 1992:

sort lin__i 
merge n:1 lin__i using "$dir_data\capital_income_formerge.dta"
keep if _merge==1 | _merge==3
drop _merge



save "$dir_data\capital_income_spousal_final_0016.dta", replace













log close
